packages <- c("tidyverse","cowplot","ggtree","ggnewscale","ape","phytools","phyloAMR","gridExtra",'ggtext')
#Load Packages
invisible(lapply(packages,library,character.only=T,quietly=T))
#Scripts
source("./lib/consistent_themes.R")
source("./lib/common_functions.R")
source("./scripts/GWAS/analysis/GWAS_analysis_scripts.R")
# Print
Sys.info()
sysname
"Darwin"
release
"24.5.0"
version
"Darwin Kernel Version 24.5.0: Tue Apr 22 19:53:26 PDT 2025; root:xnu-11417.121.6~2/RELEASE_X86_64"
nodename
"msla0572.local"
machine
"x86_64"
login
"root"
user
"kgontjes"
effective_user
"kgontjes"
sessionInfo()
R version 4.5.1 (2025-06-13)
Platform: x86_64-apple-darwin20
Running under: macOS Sequoia 15.5
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Detroit
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] kableExtra_1.4.0 ggtext_0.1.2 gridExtra_2.3 phyloAMR_0.1.0
[5] phytools_2.4-4 maps_3.4.3 ape_5.8-1 ggnewscale_0.5.1
[9] ggtree_3.16.0 cowplot_1.1.3 lubridate_1.9.4 forcats_1.0.0
[13] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4 readr_2.1.5
[17] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.2 tidyverse_2.0.0
[21] knitr_1.50
loaded via a namespace (and not attached):
[1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.2
[4] optimParallel_1.0-2 fastmap_1.2.0 lazyeval_0.2.2
[7] combinat_0.0-8 digest_0.6.37 timechange_0.3.0
[10] lifecycle_1.0.4 tidytree_0.4.6 magrittr_2.0.3
[13] compiler_4.5.1 rlang_1.1.6 sass_0.4.10
[16] tools_4.5.1 igraph_2.1.4 yaml_2.3.10
[19] phangorn_2.12.1 clusterGeneration_1.3.8 mnormt_2.1.1
[22] scatterplot3d_0.3-44 xml2_1.3.8 RColorBrewer_1.1-3
[25] aplot_0.2.5 expm_1.0-0 withr_3.0.2
[28] numDeriv_2016.8-1.1 grid_4.5.1 colorspace_2.1-1
[31] scales_1.4.0 iterators_1.0.14 MASS_7.3-65
[34] cli_3.6.5 rmarkdown_2.29 treeio_1.32.0
[37] generics_0.1.4 rstudioapi_0.17.1 tzdb_0.5.0
[40] cachem_1.1.0 parallel_4.5.1 ggplotify_0.1.2
[43] yulab.utils_0.2.0 vctrs_0.6.5 Matrix_1.7-3
[46] jsonlite_2.0.0 gridGraphics_0.5-1 hms_1.1.3
[49] patchwork_1.3.0 hues_0.2.0 systemfonts_1.2.2
[52] foreach_1.5.2 jquerylib_0.1.4 glue_1.8.0
[55] codetools_0.2-20 DEoptim_2.2-8 stringi_1.8.7
[58] gtable_0.3.6 quadprog_1.5-8 pillar_1.10.2
[61] htmltools_0.5.8.1 R6_2.6.1 doParallel_1.0.17
[64] evaluate_1.0.3 lattice_0.22-7 gridtext_0.1.5
[67] ggfun_0.1.8 bslib_0.9.0 Rcpp_1.0.14
[70] fastmatch_1.1-6 svglite_2.1.3 coda_0.19-4.1
[73] nlme_3.1-168 xfun_0.52 fs_1.6.6
[76] pkgconfig_2.0.3
df <- readRDS("./data/dataset/df.RDS")
pclustering <- readRDS("./data/asr_clustering/blbli_asr_clustering_df.RDS")
tr <- read.tree("./data/tree/tree.treefile")
gwas_table <- readRDS("./data/GWAS/hits/gwas_hits_table.RDS") %>% subset(nn_qc=="Yes" & locus_tag != '')
gwas_hits <- readRDS("./data/GWAS/hits/gwas_mat.RDS")
nn <- readRDS("./data/nearest_neighbor_comparisons/nn_comparisons.RDS")
df <- left_join(df,gwas_hits %>% mutate(isolate_no = rownames(.))) %>% left_join(.,pclustering)
figure_3.A_tb <- gwas_table %>% select(locus_tag,gene,product,resistance_category,sig_tests,sig_models_simple,sig_phenotypes)
figure_3.A_tb$locus_tag <- recode(figure_3.A_tb$locus_tag,"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
figure_3.A_tb$gene <- ifelse(grepl(pattern = 'ompK36',figure_3.A_tb$locus_tag) | figure_3.A_tb$locus_tag =='KPNIH1_RS18665',"ompK36",figure_3.A_tb$gene) %>% {ifelse(is.na(.),"",.)}
figure_3.A_tb$product <- ifelse(figure_3.A_tb$locus_tag == "KPNIH1_RS18100","MdtA/MuxA family multidrug efflux RND \ntransporter periplasmic adaptor subunit",figure_3.A_tb$product)
figure_3.A_tb$product <- ifelse(figure_3.A_tb$gene=='ompK36','porin OmpC',figure_3.A_tb$product)
figure_3.A_tb$product <- ifelse(grepl(pattern = 'KPC',figure_3.A_tb$locus_tag),"blaKPC-associated plasmid",figure_3.A_tb$product)
figure_3.A_tbl <- figure_3.A_tb %>% select(locus_tag,gene,product,resistance_category,sig_tests,sig_models_simple,sig_phenotypes) %>% `colnames<-`(c('Significant Hit','Gene Hit', 'Product','Genotype Category', 'Significant Tests','Significant Models', 'Significant Phenotypes'))
figure_3.A <- figure_3.A_tbl %>% tableGrob(rows = NULL,theme=mytheme_GWAS)
rownames(df) <- df$isolate_no
figure_3.df_mat <- get_gwas_hit_matrix(gwas_table,df)
sig_hits_name <- recode(colnames(figure_3.df_mat),"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
figure_3.phylo <- gwas_figures(df,tr,figure_3.df_mat,sig_hits_name = sig_hits_name %>% gsub("_1|_2|_1.2|_2.2|_3|_3.2","",.)) + ylim(NA,550)
figure_3.FB <- plot_grid(figure_3.phylo,labels = "B",label_size=24)
figure_3.FB
figure_3_descriptive_plot_theme <- theme(legend.position="bottom",axis.text = element_text(size=20,colour = "black"),axis.title = element_text(size=22,colour = "black"),legend.title = element_text(size=22,colour = "black"),legend.text = element_text(size=20,colour = "black"),legend.title.align=0.5 ,legend.key.size = unit(0.5, "cm"),legend.key.width = unit(0.5, "cm"))
gwas_nons_freq <- variants_freq(gwas_table$genotype,df)
gwas_nons_freq$locus_tag <- recode(gwas_nons_freq$locus_tag,"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
gwas_nons_freq$`Significant Hit` <- gwas_nons_freq$locus_tag
gwas_nons_freq$`Significant Hit` <- factor(gwas_nons_freq$`Significant Hit`, levels=rev(unique(figure_3.A_tb$locus_tag)))
figure_3.C <- freq_plot(gwas_nons_freq) + figure_3_descriptive_plot_theme
# D. FC MIC (MVB * IR overlay)
nn_data_melt <- lapply(gwas_table$genotype,generate_nn_melt_data,nn_data=nn) %>% do.call(rbind,.)
nn_data_melt$locus_tag <- recode(nn_data_melt$locus_tag,"AA018"="AA018 KPC plasmid cluster","AA552"="AA552 KPC plasmid cluster","OmpK36_loop3_insertion"="Loop 3 insertion in ompK36","OmpK36GD"="GD loop 3 insertion in ompK36",'OmpK36_c25t' = 'C25T SNP in ompK36','OmpK36_truncation_kleborate'='Truncation of ompK36')
nn_data_melt$locus_tag <- factor(nn_data_melt$locus_tag, levels=rev(unique(figure_3.A_tb$locus_tag)))
figure_3.D <- nn_plot(nn_data_melt) + figure_3_descriptive_plot_theme
figure_3.FCD <- plot_grid(figure_3.C,figure_3.D ,nrow = 2,labels = c("C","D"),label_size = 28,align = "hv",label_x = -0.02)
figure_3.FCDB <- plot_grid(figure_3.FB,figure_3.FCD,ncol = 2,rel_heights = c(1,1),rel_widths = c(1,0.625))
figure_3.FA <- plot_grid(figure_3.A,labels = "A",label_size=28,align = "hv")
figure_3 <- plot_grid(figure_3.FA,figure_3.FCDB,labels = c("",""),nrow=2,label_size = 28, rel_heights = c(0.275,1))
figure_3